


*********************************************************
********** FORMAT HIGH SCHOOL ENROLLMENT FILES **********
*********************************************************



use "$data_clean/full_hs_file.dta", clear
	merge m:1 sasid using "$data\lottery_applicants_voting.dta" , keep(2 3) nogen
format sasid %12.0f

keep sasid proj_year12 enstat grade t period year 
keep if proj_year12 <= 2021 & proj_year12 >= 2006 // relevant sample
sort sasid t grade
duplicates drop
  
*drop duplicated status in the same year-file for observations not at end
*the following 5 lines of code potentially affect how transferred etc. is coded
duplicates tag sasid t, gen(tag)
tab tag
bys sasid: egen maxt = max(t)
bys sasid: drop if (tag > 0 & enstat ~= "01" & enstat ~= "04" & t < maxt)
drop maxt  
  
*Code status for exit
gen status=""
*Graduates
gen tempstat=enstat=="04"
bys sasid t: egen maxtemp=max(tempstat)
replace status="graduated" if maxtemp==1 & status==""
drop tempstat maxtemp
*Deceased
gen tempstat=enstat=="06"
bys sasid t: egen maxtemp=max(tempstat)
replace status="deceased" if maxtemp==1 & status==""
drop tempstat maxtemp
*Enrolled
gen tempstat=enstat=="01"
bys sasid t: egen maxtemp=max(tempstat)
replace status="enrolled" if maxtemp==1 & status==""
drop tempstat maxtemp
*Transferred
gen tempstat=enstat=="02" | enstat=="07" | enstat=="08" | enstat=="20" | enstat=="21"| enstat=="22" | enstat=="23" | enstat=="24" | enstat=="41"
bys sasid t: egen maxtemp=max(tempstat)
replace status="transferred" if maxtemp==1 & status==""
drop tempstat maxtemp	

gen tempstat=enstat=="11" | enstat=="10"
bys sasid t: egen maxtemp=max(tempstat)
replace status="completed without grad" if maxtemp==1 & status==""
drop tempstat maxtemp				

gen tempstat=enstat=="09"
bys sasid t: egen maxtemp=max(tempstat)
replace status="max age" if maxtemp==1 & status==""
drop tempstat maxtemp		
*Dropouts
gen tempstat=enstat=="03" | enstat=="30" | enstat=="31" | enstat=="32" | enstat=="33"| enstat=="34" | enstat=="35" | enstat=="36"
bys sasid t: egen maxtemp=max(tempstat)
replace status="dropout" if maxtemp==1 & status==""
drop tempstat maxtemp	

gen tempstat=enstat=="05"
bys sasid t: egen maxtemp=max(tempstat)
replace status="permanent exclusion" if maxtemp==1 & status==""
drop tempstat maxtemp		

gen tempstat=enstat=="40"
bys sasid t: egen maxtemp=max(tempstat)
replace status="not enrolled special service" if maxtemp==1 & status==""
drop tempstat maxtemp
	
*test if transfer grade and enroll grades are the same for the last records of each student
bys sasid t: gen x = (enstat == "01") if tag > 0
bys sasid t: egen xx = max(x) if tag > 0	
bys sasid t: egen mng = mean(grade) if tag > 0 & xx == 1
drop x xx mng tag
   
	 
	 ***************************************************************
	 ** the following set of code generates the variables we want **
	 ***************************************************************
	
	
duplicates drop	
keep sasid proj_year12 grade t period year status  	

*gen graduation status vars
gen gradind = (status == "graduated" & ((proj_year12 >= year & period == "eoy")))
bys sasid: egen hsgrad_4yr = max(gradind)

gen gradind2 = (status == "graduated" & ((proj_year12 >= year)|(proj_year12==year & period =="oct")|(proj_year12 +1 >= year & period=="eoy")))
replace gradind2 =. if proj_year12 == 2020
bys sasid: egen hsgrad_5yr = max(gradind2)

gen gradind3 = (status == "graduated" & ((proj_year12 >= year)|(proj_year12+1==year & period =="oct")|(proj_year12 +2 >= year & period=="eoy")))
replace gradind3 =. if proj_year12 == 2019
bys sasid: egen hsgrad_6yr = max(gradind3)
drop gradind gradind2 gradind3

/*
*gen deceased status and transfer status 
bys sasid: gen tempt = t if (proj_year12 > year)|(proj_year12 == year & period == "eoy")
bys sasid: egen maxt = max(tempt)

gen decind_ontime = (status == "deceased" & maxt ==t)
bys sasid: egen deceased_ontime = max(decind_ontime) 
 
gen transfind_ontime = (status=="transferred" & maxt == t) 
bys sasid: egen transferred_ontime = max(transfind_ontime) 
drop tempt maxt decind_ontime transfind_ontime

bys sasid: gen tempt2 = t if (proj_year12 >= year)|(proj_year12==year & period =="oct")|(proj_year12 +1 >= year & period=="eoy")
bys sasid: egen maxt2 = max(tempt2)

gen decind_within2 = (status == "deceased" & maxt ==t)
bys sasid: egen deceased_withintwo = max(decind_within2) 
 
gen transfind_within2 = (status=="transferred" & maxt == t) 
bys sasid: egen transferred_withintwo = max(transfind_within2) 
drop tempt maxt decind_within2 transfind_within2
*/
*SRC edit
*actual graduation date
g gradyear=year if status=="graduated"
	// Define HS graduation date as June 30th of that year
	tostring gradyear, replace
	g hsgraddate = date("0630"+gradyear,"MDY")
	destring gradyear, replace

keep sasid gradyear  hsgraddate proj_year12 hsgrad* 
duplicates drop

*deal with dups
*these are expected due to multiple hits in the file 

duplicates tag sasid, gen(s)
tab s

keep if s==0|hsgraddate!=.
drop s
* a few dups remain -- take earlier grad date
duplicates tag sasid, gen(s)
tab s
bys sasid: egen min=min(gradyear) if gradyear!=.
keep if gradyear==.|gradyear==min
drop s min
*egen transf_deceased_ontime = rowmax(deceased_ontime transferred_ontime)
*egen transf_deceased_withintwo = rowmax(deceased_withintwo transferred_withintwo)


save "$data/hsgrad_voting.dta", replace


		

